####Independent Expenditures Data####
library(tidyr)
library(dplyr)
library(ggplot2)
library(stats)
library(stargazer)
library(xtable)
library(foreign)
library(readstata13)
library(xlsx)
library(stringr)
library(devtools)
library(blscrapeR)
library(gdata)
library(openxlsx)
library(zoo)
library(tseries)
library(plm)
library(ecm)
library(tseries)
library(lmtest)
library(car)
library('pglm')

#setwd("~/Documents/Harvard/G_Semester2/GOV 2001/Rep_Articles/PrillamanRep")
setwd("/Users/alyssahuberts/Dropbox/DataFromScratch")

#Read in Final Data
FINAL<-read.csv("ExtensionFinal.csv", row.names=1)
FINAL<-filter(FINAL, Year > 1971 & Year < 2017 & State !="Nebraska" & State !="Alaska" & State !="Hawaii")

##########################################
#Considering Different Dependent Variables 
##########################################
#Business Tax levels as percent of GSP
state_object<- as.data.frame(cbind(state.abb, state.name, state.region))
FINAL$State <- as.character(FINAL$State)
state_object$state.name <- as.character(state_object$state.name)
FINAL$region <- state_object$state.region[match(FINAL$State, state_object$state.name)]
FINAL$region <- factor(FINAL$region, levels = c(1,2,3,4),  labels=c("Northeast", "South", "North Central", "West"))

library(gridExtra)
pdf(file = "Plots/gsp_sum_stats.pdf")
ggplot(data = FINAL[FINAL$Year>1987,]) + geom_point(aes(x = Year, y = busntaxprop, group = region, color = region)) +theme_bw() + ggtitle("State Business Taxes As Percentage of State GSP \n By Region") + labs(x = "Year", y = "State Business Tax \n (% of GSP)", color = "") + theme(title = element_text(size = 8))
dev.off()

load("tax_edited.Rdata")
tax$rev <- tax$busntax/tax$tottaxfull
tax$region <- state_object$state.region[match(tax$State, state_object$state.name)]
tax$region <- factor(tax$region, levels = c(1,2,3,4),  labels=c("Northeast", "South", "North Central", "West"))

tax_ouryears <- tax[tax$Year>1987,]
mean(tax_ouryears$rev)
mean( FINAL[FINAL$Year>1987,]$busntaxprop)
mean( FINAL[FINAL$Year>1987,]$totaxprop)



#Busines tax (absolute)
FINAL$busntax <- FINAL$busntaxprop*FINAL$GSP
pdf(file = "Plots/busntax_abs.pdf")
ggplot(data = FINAL[FINAL$Year>1987,]) + geom_point(aes(x = Year, y = busntax, group = region, color = region)) +theme_bw()  + labs(x = "Year", y = "State Business Tax (Absolute)", color = "") + theme(title = element_text(size = 8)) + scale_y_continuous(labels = comma)
dev.off()


#Busines Tax Per Capita
library(scales)
FINAL$busntax_percap <- FINAL$busntax/FINAL$poptotal
pdf(file = "Plots/busntax_pc.pdf")
ggplot(data = FINAL[FINAL$Year>1987,]) + geom_point(aes(x = Year, y = busntax_percap, group = region, color = region)) +theme_bw()  + labs(x = "Year", y = "State Business Tax (Absolute)", color = "") + theme(title = element_text(size = 8)) + scale_y_continuous(labels = comma)
dev.off()

#######################################################
# Model Selection: Correlograms
#######################################################
library(ggfortify)
library(gridExtra)
fs<-pdata.frame(FINAL, index=c("State", "Year"))##sets up panel data like xtset in stata

pdf(file = "Plots/correlogam_new.pdf", width = 9)
autoplot(acf(fs$busntax, plot = FALSE)) + theme_bw() + ggtitle("Autocorrelation in State \n Business Tax Revenue")
dev.off()

autoplot(acf(fs$inc_pcap, plot = FALSE)) + theme_bw() + ggtitle("Autocorrelation in State Per Capital Income")
autoplot(acf(fs$UnemploymentRate, plot = FALSE)) + theme_bw() + ggtitle("Autocorrelation in State Unemployment Rate")
autoplot(acf(fs$CorpIndExpBan, plot = FALSE)) + theme_bw() + ggtitle("Autocorrelation in State \n Corporate Independent Expenditure Bans")
autoplot(acf(fs$UnionIndExpBan, plot = FALSE)) + theme_bw() + ggtitle("Autocorrelation in State \n Union Independent Expenditure Bans")
autoplot(acf(fs$RevenueLimit, plot = FALSE)) + theme_bw() + ggtitle("Autocorrelation in State \n Revenue Limits")
autoplot(acf(fs$Union_pct, plot = FALSE)) + theme_bw() + ggtitle("Autocorrelation in State \n Union Percentage")
autoplot(acf(fs$bowen_legprof_firstdim, plot = FALSE)) + theme_bw() + ggtitle("Autocorrelation in State \n Leg Prof 1D")
autoplot(acf(fs$bowen_legprof_seconddim, plot = FALSE)) + theme_bw() + ggtitle("Autocorrelation in State \n Leg Prof 2D")
autoplot(acf(fs$sen_dem_per_2pty, plot = FALSE)) + theme_bw() + ggtitle("Autocorrelation in State \n Sen Dem ")
autoplot(acf(fs$gov_party, plot = FALSE)) + theme_bw() + ggtitle("Autocorrelation in State \n Gov Party ")
autoplot(acf(fs$busntaxprop_neighb, plot = FALSE)) + theme_bw() + ggtitle("Autocorrelation in State \n Neighboring State \n Business Tax as Percentage of GSP")


#######################################################
# Model Selection: Running Analysis 
#######################################################

#Plain Vanilla OLS with fixed effects 
#Note that because this is linear, we can just use factor(State) and this is equivalent to "demeaning"
#Logged dependent variable
most_generic_fe_log <- lm(log(busntax) ~ inc_pcap + UnemploymentRate + CorpIndExpBan + UnionIndExpBan + 
                        RevenueLimit + Union_Pct + bowen_legprof_firstdim + bowen_legprof_seconddim + sen_dem_per_2pty + 
                        gov_party + busntaxprop_neighb + factor(State) + log(poptotal) + log(GSP), data = fs)
AIC(most_generic_fe_log)
# 1160.116

#Same model, but with fixed effect for year as well
most_generic_fe_log_2 <- lm(log(busntax) ~ inc_pcap + UnemploymentRate + CorpIndExpBan + UnionIndExpBan + 
                            RevenueLimit + Union_Pct + bowen_legprof_firstdim + bowen_legprof_seconddim + sen_dem_per_2pty + 
                            gov_party + busntaxprop_neighb + log(poptotal) + log(GSP)+ factor(State) + factor(Year), data = fs)
AIC(most_generic_fe_log_2)
# 1303.304


#Same model, but with lagged revenue
most_generic_fe_log_3 <- lm(log(busntax) ~ inc_pcap + UnemploymentRate + CorpIndExpBan + UnionIndExpBan + 
                              RevenueLimit + Union_Pct + bowen_legprof_firstdim + bowen_legprof_seconddim + sen_dem_per_2pty + 
                              gov_party + busntaxprop_neighb+ 
                              log(poptotal) + log(GSP) + factor(State) + factor(Year) + lag(busntax), data = fs)
AIC(most_generic_fe_log_3)

# 1080.495

#Per Capita Dependent Variable
most_generic_fe_percap <- lm(busntax_percap ~ inc_pcap + UnemploymentRate + CorpIndExpBan + UnionIndExpBan + 
                        RevenueLimit + Union_Pct + bowen_legprof_firstdim + bowen_legprof_seconddim + sen_dem_per_2pty + 
                        gov_party + busntaxprop_neighb log(poptotal)+ factor(State), data = fs)

AIC(most_generic_fe_percap)

stargazer(most_generic_fe_log, most_generic_fe_percap, omit = "State")

####ECM 
library(ecm)
#Regular ECM, no interactions
xeq <- xtr <- FINAL[,c("inc_pcap", "UnemploymentRate", "CorpIndExpBan", "UnionIndExpBan","CorporateContBan", "UnionContBan",
                       "SuperMajTaxRate", "SpendingLimit", "RevenueLimit","Union_Pct", "bowen_legprof_firstdim", "bowen_legprof_seconddim",
                       "sen_dem_per_2pty", "gov_party", "busntaxprop_neighb", "logpop")]
model1 <- ecm(log(FINAL$busntax), xeq, xtr, includeIntercept=TRUE)
AIC(model1)
#AIC =  3031.646


#make relevant interactions 
FINAL$corp_indban_int_party <- FINAL$CorpIndExpBan*FINAL$sen_dem_per_2pty
FINAL$union_indban_int_party <- FINAL$UnionIndExpBan*FINAL$sen_dem_per_2pty
FINAL$corp_contban_int_party <-  FINAL$CorporateContBan*FINAL$sen_dem_per_2pty
FINAL$union_contban_int_party<-  FINAL$UnionContBan*FINAL$sen_dem_per_2pty
#Introduce the interactions
#Assume all predictors are needed in the equilibrium and transient terms of ecm
FINAL$logpop <- log(FINAL$poptotal)
xeq <- xtr <- FINAL[,c("inc_pcap", "UnemploymentRate", "CorpIndExpBan", "UnionIndExpBan","CorporateContBan", "UnionContBan",
                 "SuperMajTaxRate", "SpendingLimit", "RevenueLimit","Union_Pct", "bowen_legprof_firstdim", "bowen_legprof_seconddim",
                 "sen_dem_per_2pty", "gov_party", "busntaxprop_neighb", "corp_indban_int_party", "union_indban_int_party", "union_contban_int_party", "logpop")]
model2 <- ecm(log(FINAL$busntax), xeq, xtr, includeIntercept=TRUE)
# 4282.7


####################################################################
#Splitting the Sample and Estimating the Models Against one another:
####################################################################
FINAL_EARLY <- FINAL[FINAL$Year < 2002,]
FINAL_LATE <- FINAL[FINAL$Year >2001,]

spec_1_fe <- lm(log(busntax) ~ inc_pcap + UnemploymentRate + CorpIndExpBan + UnionIndExpBan + 
                    RevenueLimit + Union_Pct + bowen_legprof_firstdim + bowen_legprof_seconddim + sen_dem_per_2pty + 
                    gov_party + busntaxprop_neighb + log(poptotal) + factor(State), data = FINAL_EARLY)

xeq <- xtr <- FINAL_EARLY[,c("inc_pcap", "UnemploymentRate", "CorpIndExpBan", "UnionIndExpBan","CorporateContBan", "UnionContBan",
                       "SuperMajTaxRate", "SpendingLimit", "RevenueLimit","Union_Pct", "bowen_legprof_firstdim", "bowen_legprof_seconddim",
                       "sen_dem_per_2pty", "gov_party", "busntaxprop_neighb", "corp_indban_int_party", "union_indban_int_party", "union_contban_int_party", "logpop")]
spec_2_ecm  <- ecm(log(FINAL_EARLY$busntax), xeq, xtr, includeIntercept=TRUE)

library(prediction)
pred_vals_fe <- prediction(spec_1_fe, data = FINAL_LATE,  calculate_se = TRUE)
pred_vals_ecm <- ecmpredict(spec_2_ecm, newdata = FINAL_LATE, init =25.49038 )

predicted <- cbind(FINAL_LATE, pred_vals_fe, pred_vals_ecm)
predicted$squared_error_fe <- (log(predicted$busntax) - predicted$fitted)^2
predicted$squared_error_ecm <- (log(predicted$busntax) - predicted$pred_vals_ecm)^2

mean(predicted$squared_error_fe)
mean(predicted$squared_error_ecm)

ggplot(data = predicted) + geom_point(aes(x = Year, y = log(busntax)), color = "seagreen2", alpha = .5) + geom_point(aes(x = Year, y = fitted), color = "cornflowerblue", alpha = .5) + geom_point(aes(x= Year, y = pred_vals_ecm), color = "salmon2", alpha = .5) + theme_bw()


###################################
#Figures and Tables for Paper 
###################################

#Fixed Effects for State
model1 <- lm(log(busntax) ~ log(inc_pcap) + UnemploymentRate + CorpIndExpBan + UnionIndExpBan + 
                            RevenueLimit + Union_Pct + bowen_legprof_firstdim + bowen_legprof_seconddim + sen_dem_per_2pty + 
                            gov_party + busntaxprop_neighb + factor(State) + log(poptotal) + log(GSP), data = fs)
AIC(model1)
#1160.116

#Fixed Effects for State and Year
model2 <- lm(log(busntax) ~ log(inc_pcap) + UnemploymentRate + CorpIndExpBan + UnionIndExpBan + 
                              RevenueLimit + Union_Pct + bowen_legprof_firstdim + bowen_legprof_seconddim + sen_dem_per_2pty + 
                              gov_party + busntaxprop_neighb + log(poptotal) + log(GSP)+ factor(State) + factor(Year), data = fs)
AIC(model2)
# 1082.966

#interctions

model3 <- lm(log(busntax) ~ log(inc_pcap) + UnemploymentRate + CorpIndExpBan + UnionIndExpBan + 
               RevenueLimit + Union_Pct + bowen_legprof_firstdim + bowen_legprof_seconddim + sen_dem_per_2pty + 
               gov_party + busntaxprop_neighb +FINAL$corp_indban_int_party + FINAL$union_indban_int_party  +    FINAL$corp_contban_int_party + FINAL$union_contban_int_party +log(poptotal) + log(GSP)+ factor(State) + factor(Year), data = fs)

stargazer(model1, model2, model3, omit = c("State", "Year"))

#ECM with no interactions

FINAL$loginc <- log(FINAL$inc_pcap)
FINAL$logpop <- log(FINAL$poptotal)
xeq <- xtr <- FINAL[,c("loginc", "UnemploymentRate", "CorpIndExpBan", "UnionIndExpBan","CorporateContBan", "UnionContBan",
                       "SuperMajTaxRate", "SpendingLimit", "RevenueLimit","Union_Pct", "bowen_legprof_firstdim", "bowen_legprof_seconddim",
                       "sen_dem_per_2pty", "gov_party", "busntaxprop_neighb", "logpop")]
model4 <- ecm(log(FINAL$busntax), xeq, xtr, includeIntercept=TRUE)
AIC(model4)
#2553.344


#ECM with interactions
xeq <- xtr <- FINAL[,c("loginc", "UnemploymentRate", "CorpIndExpBan", "UnionIndExpBan","CorporateContBan", "UnionContBan",
                       "SuperMajTaxRate", "SpendingLimit", "RevenueLimit","Union_Pct", "bowen_legprof_firstdim", "bowen_legprof_seconddim",
                       "sen_dem_per_2pty", "gov_party", "busntaxprop_neighb", "corp_indban_int_party", "union_indban_int_party", "union_contban_int_party", "logpop")]
model5 <- ecm(log(FINAL$busntax), xeq, xtr, includeIntercept=TRUE)
AIC(model5)
#2505.13
stargazer(model4, model5)
library(stargazer)

get_names <- names(coef(model2))
get_names[2:14] <- c("Log Per Capita Income", "Unemployment Rate", "Corporate Independent Expenditure Ban", "Union Independent Expenditure Ban","Revenue Limit", "Union Membership", "Bowen (Dimension 1)", "Bowen (Dimension 2)", "Percent Democrat Senate", "Governor's Party", "Neighbor State Business Taxes", "Log POpulation", "Log GSP")
stargazer(model2, omit = c("State", "Year"), covariate.labels = get_names, dep.var.labels = "Log Business Tax Revenue")
#################
#Predicting Data
##################
FINAL_EARLY <- FINAL[FINAL$Year < 2002,]
FINAL_LATE <- FINAL[FINAL$Year >2001,]

predmodel1 <- lm(log(busntax) ~ log(inc_pcap) + UnemploymentRate + CorpIndExpBan + UnionIndExpBan + 
               RevenueLimit + Union_Pct + bowen_legprof_firstdim + bowen_legprof_seconddim + sen_dem_per_2pty + 
               gov_party + busntaxprop_neighb+ 
               log(poptotal) + log(GSP) + factor(State) , data = FINAL_EARLY)


xeq <- xtr <- FINAL_EARLY[,c("inc_pcap", "UnemploymentRate", "CorpIndExpBan", "UnionIndExpBan","CorporateContBan", "UnionContBan",
                       "SuperMajTaxRate", "SpendingLimit", "RevenueLimit","Union_Pct", "bowen_legprof_firstdim", "bowen_legprof_seconddim",
                       "sen_dem_per_2pty", "gov_party", "busntaxprop_neighb", "corp_indban_int_party", "union_indban_int_party", "union_contban_int_party", "logpop")]
predmodel2 <- ecm(log(FINAL_EARLY$busntax), xeq, xtr, includeIntercept=TRUE)

pred_vals_fe <- prediction(predmodel1, data = FINAL_LATE,  calculate_se = TRUE)
#prediction for ecm requires an initial value- i used mean from 2011
pred_vals_ecm <- ecmpredict(spec_2_ecm, newdata = FINAL_LATE, init =25.49038 )

predicted_fe <- cbind(FINAL_LATE, pred_vals_fe, pred_vals_ecm)
predicted$squared_error_fe <- (log(predicted$busntax) - predicted$fitted)^2
predicted$squared_error_ecm <- (log(predicted$busntax) - predicted$pred_vals_ecm)^2

mean(predicted$squared_error_fe)
mean(predicted$squared_error_ecm)
pdf(file = "Plots/predictions.pdf")
ggplot(data = predicted) + geom_point(aes(x = Year, y = log(busntax),color = "True Values") , alpha = .5) + geom_point(aes(x = Year, y = fitted, color = "Fixed Effects"), alpha = .5) + geom_point(aes(x= Year, y = pred_vals_ecm, color = "ECM"), alpha = .5) + theme_bw()  + scale_colour_manual("Predicted Values", values=c("seagreen2", "cornflowerblue", "salmon2")) + ggtitle("Predicted Values \n Fixed Effects vs. ECM") + labs(x = "Year", y = "Logged Business") 
dev.off()


